clear all
set more off
capture log close

* cd
global mainpath "~/Dropbox"
cd $mainpath

* paths

global path_cclpg_data "CCLPG/CCLPG_paper/Data"
global path_estimates "Maintenance/cleaning_experiment_full_paper/estimates"
global path_dofiles "Maintenance/cleaning_experiment_full_paper/code"
global path_graphs "Maintenance/cleaning_experiment_full_paper/graphs"
global path_tables "Maintenance/cleaning_experiment_full_paper/tables"

set scheme s1mono

use ${path_cclpg_data}/baseline_endline_merge.dta, clear

****We use for preference i) baseline data when the variable is available for the full sample (e.g., leadership and network nominations); ii) follow-up data when the variable is plausibly pre-determined (e.g. income, religion) ; and iii) baseline data in the baseline sample when the variable is clearly endogenous to treatment (e.g. arsenic contamination)

replace caretaker_hh = 0 if caretaker_hh == .

***Caretakers and distance from a well

twoway (hist final_distance_hh_constr_mins if success_rate > 0, lcolor(%10) fcolor(%10) yscale(range(-0.005 0.4) alt axis(1))) (lpolyci caretaker_hh final_distance_hh_constr_mins if success_rate > 0 [aweight = fu_pweight_e], yaxis(2) yscale(range(-0.0025 0.2) alt axis(2))), ytitle("Density", axis(1)) ytitle("Share of households with caretaker", axis(2)) xtitle("Walking time from nearest project well (minutes)") legend(order(3 "Fitted curve" 2 "95% confidence interval" 1 "Density of observations") cols(1) ring(0) bplacement(neast))
graph export ${path_graphs}/caretaker_distance.png, replace


****shorten variable names where necesssary 

gen index_as = index_arsenic_impact
gen hh_as_any = hh_arsenic_any
gen hh_as_50pl = hh_arsenic_50plus
gen ws_as_any_wm = ws_arsenic_any_wmean  
gen ws_as_50pl_wm = ws_arsenic_50plus_wmean
gen ws_soc_wtp = new_source_soc_wtp
gen ws_soc_wtp_time = new_source_soc_wtp_time
gen res_wage = reservation_wage
gen fu_p_score_2usd = fu_poverty_score_2usd
gen fu_hh_ass_c1 = fu_hh_assets_comp1
gen network_nom = network_nominations
gen fu_hhh_noedu = fu_hh_head_no_educ

gen fu_hh_head_read_dummy = (fu_hh_head_read == "yes") if fu_hh_head_read != ""
gen fu_hhh_noread = 1 - fu_hh_head_read_dummy
replace fu_hhh_noread = (1 - hh_head_read_dummy) if fu_hhh_noread == .

gen fu_hhh_muslim = fu_hh_head_muslim

global demand index_as hh_arsenic_any hh_as_50pl ws_as_any_wm ws_as_50pl_wm 
global labour_avail num_hh_m res_wage value_time_h
global income fu_p_score_2usd fu_self_poor fu_hh_ass_c1 fu_self_upper 
global educ fu_hhh_noedu fu_hhh_noread
global income_other fu_savings loan_dummy lend_dummy 
global social fu_hhh_muslim know_ass_ind v_r_trust_v_much network_nom leader_hh
global wtp ws_soc_wtp ws_soc_wtp_time 

global select_ctakers $demand $income $income_other $educ $labour_avail $social $wtp
global select_ctakers_b $demand $labour_avail know_ass_ind v_r_trust_v_much ws_soc_wtp ws_soc_wtp_time 

//Demean all variables within village 

global g ""

foreach var of global select_ctakers {

	if regexm("${selection_ctakers_b}","`var'") == 1 {
		global weight pweight_b
	}
	else global weight fu_pweight_e
	
	capture drop `var'_std

	sum `var' [aweight = $weight] 
	gen `var'_std = (`var' - r(mean))/r(sd)
	
	capture drop temp
	capture drop `var'_std_dm
	egen temp = wtmean(`var'_std), weight(${weight}) by(village_code_correct)
	gen `var'_std_dm = `var'_std - temp
	capture drop temp
	
	global g $g `var'_std_dm	
}


//Describe average variables that predict caretakers 

global modellist "treated" 
global reps 100
global het_outcome caretaker_hh
global het_sample "success_rate > 0 & panel_final ==1 "
global sample_vars "success_rate panel_final"
global weight "pweight_b"

set seed 1347473771
set sortseed 979562748

*do "$path_dofiles/heterogeneity_ML_pooled.do"
*run Friday 2nd August 2024

//Caretakers 

global select_ctakers_het_var caretaker_hh
global select_ctakers_BLP_format "%03.2f"
global select_ctakers_GATES_format "%03.2f"
global select_ctakers_GATES_ylabel "-0.04(0.02)0.04"
global select_ctakers_CL_format "%03.2f"
global select_ctakers_short ct
global select_ctakers_modellist "treated"
global select_ctakers "${select_ctakers}"

global tables select_ctakers

foreach g of global tables {
	
	global lab_index_as1 "As exposure index"
	global lab_index_as2 "(baseline)"
	global lab_hh_arsenic_any1 "As in drinking water,"
	global lab_hh_arsenic_any2 "(baseline, WHO)"
	global lab_hh_as_50pl1 "As in drinking water"
	global lab_hh_as_50pl2 "(baseline, BGD)"
	global lab_ws_as_any_wm1 "As in water source at" 
	global lab_ws_as_any_wm2 "baseline (WHO)"
	global lab_ws_as_50pl_wm1  "As in water source at" 
	global lab_ws_as_50pl_wm2 "baseline (BGD)"
	
	global lab_ws_soc_wtp1 "WTP for new safe source" 
	global lab_ws_soc_wtp2 "(cash)" 
	global lab_ws_soc_wtp_time1 "WTP for new safe source" 
	global lab_ws_soc_wtp_time2 "(time)" 


	global lab_fu_hhh_noedu1 "Household head uneducated" 
	global lab_fu_hhh_noread1 "Household head illiterate" 
	
	global lab_fu_hh_ass_c11 "Household asset index"
	global lab_fu_p_score_2usd1 "Predicted income" 
	global lab_fu_p_score_2usd2 "$<$ poverty threshold"
	
	global lab_fu_self_poor1 "Self-reported poor" 
	global lab_fu_self_poor2 "" 
	global lab_fu_self_upper1 "Self-reported high-income" 
	global lab_fu_self_upper2 "" 


	global lab_fu_savings1 "Savings"
	global lab_loan_dummy1 "Borrowed money"
	global lab_lend_dummy1 "Lent money"
	
	global lab_fu_hhh_muslim1 "Muslim"

	global lab_leader_hh1 "Identified as"
	global lab_leader_hh2 "community leader"
	
	global lab_know_ass_ind1 "Knows of community"
	global lab_know_ass_ind2 "association"

	global lab_network_nom1 "In-network size"
	global lab_network_nom2 ""

	global lab_res_wage1 "Reservation wage"
	global lab_value_time_h1 "Inferred value of time"	
	
	global lab_num_hh_m1 "Household size"

	foreach m in ${`g'_modellist} {
	
		local m_short = substr("`m'",1,1)
	
		*BLP Table
			
		use $path_estimates/${`g'_het_var}_`m'_BLP, clear 
	
		sum het_`m'_BLP1, detail 
		local b = r(p50)
		global ATE_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")
		global ATE = `b'
	
		sum het_`m'_BLP2, detail 
		local b = r(p50)
		global ATE_ll_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")	
		global ATE_ll = `b'
	
		sum het_`m'_BLP3, detail 
		local b = r(p50)
		global ATE_ul_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")	
		global ATE_ul = `b'

		sum het_`m'_BLP4, detail 
		local b = min(r(p50) * 2, 1)
	
		if `b' < 0.001 {
			global ATE_p_`g'_`m' =  "\$<\$0.001 "
		}
	
		else {
			global ATE_p_`g'_`m' = strofreal(`b',"%04.3f")	
		}

		sum het_`m'_BLP5, detail 
		local b = r(p50)
		global HET_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")
	
		sum het_`m'_BLP6, detail 
		local b = r(p50)
		global HET_ll_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")	
	
		sum het_`m'_BLP7, detail 
		local b = r(p50)
		global HET_ul_`g'_`m' = strofreal(`b',"${`g'_BLP_format}")	
	
		sum het_`m'_BLP8, detail 
		local b = min(r(p50) * 2, 1)
	
		if `b' < 0.001 {
			global HET_p_`g'_`m' =  "\$<\$ 0.001 "
		}
	
		else {
			global HET_p_`g'_`m' = strofreal(`b',"%04.3f")	
		}
	
		*GATES Table and Figure
	
		use $path_estimates/${`g'_het_var}_`m'_GATES, clear 
	
		*store values for table (do not monotonize - don't know how to do hypothesis tests after monotonization)

		sum het_`m'_GATES1, detail 
		local b = r(p50)
		global GATE1_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES2, detail 
		local b = r(p50)
		global GATE1_ll_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")

		sum het_`m'_GATES3, detail 
		local b = r(p50)
		global GATE1_ul_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES4, detail 
		local b = min(r(p50) * 2, 1)
	
		if `b' < 0.001 {
			global GATE1_p_`g'_`m' =  "<0.001 "
		}
	
		else {
			global GATE1_p_`g'_`m' = strofreal(`b',"%04.3f")	
		}	

		sum het_`m'_GATES13, detail 
		local b = r(p50)
		global GATE4_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES14, detail 
		local b = r(p50)
		global GATE4_ll_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")

		sum het_`m'_GATES15, detail 
		local b = r(p50)
		global GATE4_ul_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES16, detail 
		local b = min(r(p50) * 2, 1)
	
		if `b' < 0.001 {
			global GATE4_p_`g'_`m' =  "\$<\$0.001 "
		}
	
		else {
			global GATE4_p_`g'_`m' = strofreal(`b',"%04.3f")	
		}	

		sum het_`m'_GATES17, detail 
		local b = r(p50)
		global GATEd_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES18, detail 
		local b = r(p50)
		global GATEd_ll_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")

		sum het_`m'_GATES19, detail 
		local b = r(p50)
		global GATEd_ul_`g'_`m' = strofreal(`b',"${`g'_GATES_format}")
	
		sum het_`m'_GATES20, detail 
		local b = min(r(p50) * 2, 1)
	
		if `b' < 0.001 {
			global GATEd_p_`g'_`m' =  "\$<\$0.001 "
		}
	
		else {
			global GATEd_p_`g'_`m' = strofreal(`b',"%04.3f")	
		}	

	
		*GATES figures 
	
		capture drop GATES_mean
		capture drop GATES_ll
		capture drop GATES_ul
	
		gen GATES_mean = .
		gen GATES_ll = .
		gen GATES_ul = .
			
		forvalues q = 1(1)4 {
		
			local k = ((`q'-1)*4) + 1
	
			sum het_`m'_GATES`k', detail 
			replace GATES_mean = r(p50) if _n == `q'
		
			local k = `k' + 1
		
			sum het_`m'_GATES`k', detail 
			replace GATES_ll = r(p50) if _n == `q'
		
			local k = `k' + 1
		
			sum het_`m'_GATES`k', detail 
			replace GATES_ul = r(p50) if _n == `q'
		
		}
	
		*impose monotonicity
	
		foreach p in mean ll ul {
	
			capture drop rank
			egen rank = rank(GATES_`p'), unique
		
			capture drop GATES_`p'_m
			gen GATES_`p'_m = .
				
			forvalues q = 1(1)4 {
			
				capture drop temp*
				gen temp = GATES_`p' if rank == `q'
				egen temp2 = min(temp)
			
				replace GATES_`p'_m = temp2 if _n == `q'
	
			}
		}
	
		capture drop rank temp*
	
	
		gen q = _n if _n <= 4
	
		twoway (scatter GATES_mean_m q, mcolor(black) msymbol(square))  (rcap GATES_ul_m GATES_ll_m q, lcolor(black)), yline(0, lcolor(black)) yline(${ATE}, lcolor(red)) yline(${ATE_ll}, lcolor(red) lpattern(shortdash)) yline(${ATE_ul}, lcolor(red) lpattern(shortdash)) ylabel(${`g'_GATES_ylabel}, labsize(large)) xlabel(, labsize(large)) xsize(2) ysize(1.5)  xtitle("") ytitle("") legend(off) name(het_`m'`hv_stub', replace)
		graph export "${path_graphs}/het_`g'_`m'.png", replace
	
		*selection table 
				
		local n_selected = 0
		local vs_selected = ""
		local CLAN_fig_list = ""

		foreach v of global `g' {
	
			use $path_estimates/${`g'_het_var}_`m'_`v', clear
		
			capture sum het_`m'_`v'20, detail
			capture sum het_`m'_`v'20, detail
			capture sum het_`m'_`v'_std20, detail
	
			local p = r(p50) * 2 

			if `p' < 0.1 {
	
				local n_selected = `n_selected' + 1
	
				local v_selected_`n_selected' = "`v'"
		
				local vs_selected = "`vs_selected' " + "`v'"
				
						
				sum het_`m'_`v'1, detail
				local b = r(p50)
				global CL1_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
		
				sum het_`m'_`v'2, detail				
				local b = r(p50)
				global CL1_ll_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
		
				sum het_`m'_`v'3, detail
				local b = r(p50)
				global CL1_ul_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = 13
		
				sum het_`m'_`v'`j', detail
				local b = r(p50)
				global CL4_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  1
		
				sum het_`m'_`v'`j', detail				
				local b = r(p50)
				global CL4_ll_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  1
		
				sum het_`m'_`v'`j', detail
				local b = r(p50)
				global CL4_ul_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  2
		
				sum het_`m'_`v'`j', detail
				local b = r(p50)
				global CLd_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  1
		
				sum het_`m'_`v'`j', detail				
				local b = r(p50)
				global CLd_ll_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  1
		
				sum het_`m'_`v'`j', detail
				local b = r(p50)
				global CLd_ul_${`g'_short}_`v'_`m_short' = strofreal(`b',"${`g'_CL_format}")
				
				local j = `j' +  1

				if `p' < 0.001 {
					global CLd_p_${`g'_short}_`v'_`m_short' =  "\$ <\$0.001 "
				}

				else {
					global CLd_p_${`g'_short}_`v'_`m_short' = strofreal(`p',"%04.3f")	
				}	
		
				global l_`v' "`l_`v''"
				
				*figures 

				capture drop CLAN_mean
				capture drop CLAN_ll
				capture drop CLAN_ul

				gen CLAN_mean = .
				gen CLAN_ll = .
				gen CLAN_ul = .
		
				forvalues q = 1(1)4 {
	
					local k = ((`q'-1)*4) + 1

					sum het_`m'_`v'`k', detail 
					replace CLAN_mean = r(p50) if _n == `q'
	
					local k = `k' + 1
	
					sum het_`m'_`v'`k', detail 
					replace CLAN_ll = r(p50) if _n == `q'
	
					local k = `k' + 1
	
					sum het_`m'_`v'`k', detail 
					replace CLAN_ul = r(p50) if _n == `q'
	
				}		

				gen q = _n if _n <= 4

				twoway (scatter CLAN_mean q, mcolor(black) msymbol(square) msize(large))  (rcap CLAN_ul CLAN_ll q, lcolor(black) lwidth(medthick)), yline(0, lcolor(black)) ylabel(${`g'_CLAN_ylabel}, labsize(huge)) xlabel(,labsize(huge)) legend(off) xsize(2) ysize(1.5) ytitle("") xtitle("") name(het_`m'_`v', replace)
				graph export "${path_graphs}/subfigs/het_`g'_t_`v'.png", replace
				
				global het_${`g'_short}_`t'_`v' = "\subfloat[{\footnotesize ${lab_`v'1} ${lab_`v'2}}]{\label{fig:CLAN_`g'_t_`v'}\centering\includegraphics[width=0.25\linewidth]{graphs/subfigs/het_`g'_t_`v'.png}}"		

			}
	

		}
		
		*compile CLAN table

		global g "${`g'_short}"
		global hv_stub "`hv_stub'"
		global pair "`m'"
		global vs_selected "`vs_selected'"
		global n_selected `n_selected'
		global s "`t'"

		texdoc do "${path_dofiles}/table_CLAN_ave.do"
		
		texdoc do "${path_dofiles}/fig_CLAN_ave.do" 						
	
	}
	
	
	
	*compile tables
	
	global g `g'
	
	texdoc do "${path_dofiles}/table_BLP_ave.do"	
	texdoc do "${path_dofiles}/table_GATES_ave.do"

}


//the end 
 